# Check if the 'SPEI' and 'PDSI' packages are installed
if (!requireNamespace("SPEI", quietly = TRUE) | !requireNamespace("scPDSI", quietly = TRUE)) {
  stop("The 'SPEI' or 'PDSI' package is not installed. Please install it to proceed.")
} else {
  library(SPEI)
  library(scPDSI)
}
library(dplyr)
library(readxl)
library(parallel)

setwd("/discover/nobackup/myang5/VACS/SIMPLE_DISCOVER/")
col_types <- c("numeric", "numeric", "numeric", "numeric", "numeric", "numeric", "text", "text", "numeric", "numeric")
SIMPLE_grid_data <- read_excel("Input/Map/SIMPLE-All_Africa_Countries_List.xlsx", col_types = col_types)
file_list <- sort(list.files(path = "Output/output_withArid", pattern = "\\.csv$", full.names = TRUE))
file_list <- "Output/output_withArid/maize_baseline_ERA_5_Africa.csv"

# function read original daily weather, convert to monthly, and calculate pet
MonthlyWeatherReader=function(weatherFile, lat)
{
  #############  read weather 
  weather<-read.table(weatherFile,header=TRUE,skip=4)
  ### Next line only for future scenario AgMIP files that start at 1850, comment out otherwise
  colnames(weather)[1]="DATE"  #Rename first column
  weather$DATE<-as.numeric(gsub("^.{0,2}","",weather$DATE))  #Remove first 2 digits of date, 7 digit to 5 digit
  
  #############  this makes sure there are 5 characters for Date in weather data
  if(colnames(weather)[1]!="DATE"){colnames(weather)[1]="DATE"}
  
  #tranfer DOY to DATE
  weather$DATE=sprintf("%05d",as.numeric(weather$DATE))

  #############    remove letter if weather includes letter
  Dropletter=function(weather)
  {
    if(!is.numeric(weather)){
      if(!require(stringr)) {install.packages("stringr")}
      library(stringr)
      weather=lapply(str_extract_all(weather, "[^[:alpha:]]"),paste,collapse = "")%>%as.numeric(.) }
    return(weather)
  }
  
  weather$TMAX<-Dropletter(weather$TMAX)
  weather$TMIN<-Dropletter(weather$TMIN)
  weather$SRAD<-Dropletter(weather$SRAD)
  weather$RAIN<-Dropletter(weather$RAIN)
  weather$WIND<-Dropletter(weather$WIND)
  weather$TMED<-(weather$TMAX+weather$TMIN)/2
  
  weather_monthly <- weather %>%
    group_by(YYYY, MM) %>%
    summarize(
      PRCP = sum(RAIN, na.rm = TRUE),
      TMED = mean(TMED, na.rm = TRUE)
    )
  
  # calculate PET using thornthwaite approach
  weather_monthly$PET <- thornthwaite(weather_monthly$TMED, lat) 
  
  return(weather_monthly)
}

ComputeDI <- function(i) {
  file <- file_list[i]
  split_file <- strsplit(substring(file, 24, nchar(file)), split = "_")

  D7 = "X"
  if (split_file[[1]][2] == "future"){
    if (split_file[[1]][3] == "SSP126"){D5 <- "7"}
    else if (split_file[[1]][3] == "SSP370"){D5 <- "8"}
    else {stop("Unknown Scenario! Check Weather Path!")}
  }
  else if (split_file[[1]][2] == "historical"){D5 <- "8"}
  else if (split_file[[1]][2] == "baseline"){D5 <- "0"}
  else{stop("Unknown Time Period! Check Weather Path!")}
  
  # determine the 6th and 8th digit of Weather file based on Climate Model
  if (split_file[[1]][2] == "future"){
    if (split_file[[1]][4] == "GFDL-ESM4"){D6 <- "1"}
    else if (split_file[[1]][4] == "IPSL-CM6A-LR"){D6 <- "2"}
    else if (split_file[[1]][4] == "MPI-ESM1-2-HR"){D6 <- "3"}
    else if (split_file[[1]][4] == "MRI-ESM2-0"){D6 <- "4"}
    else {stop("Unknown Climate Model! Check Weather Path!")}
    D8 <- "1"
  }
  else if (split_file[[1]][2] == "historical"){
    if (split_file[[1]][4] == "GFDL-ESM4"){D6 <- "1"}
    else if (split_file[[1]][4] == "IPSL-CM6A-LR"){D6 <- "2"}
    else if (split_file[[1]][4] == "MPI-ESM1-2-HR"){D6 <- "3"}
    else if (split_file[[1]][4] == "MRI-ESM2-0"){D6 <- "4"}
    else {stop("Unknown Climate Model! Check Weather Path!")}
    D8 <- "1"
  }
  else if (split_file[[1]][2] == "baseline"){D6 <- "P"; D8 <- "X"} # W5E5 (bias-adjusted ERA-5)
  else{stop("Unknown Time Period! Check Weather Path!")}

  sim_results <- read.csv(file,header=FALSE,sep=",",stringsAsFactors=FALSE) # model outputs
  sim_results <- sim_results[-1,]
  colnames(sim_results) <- c("Exp.","Species.","Trt.","row","col","long","lat","SowingDate","Duration","Biomass","Yield","MaturityDay","HeatDays","ARIDDays","SowingYear","MaturityYear")
  line_num <- 1 #initial line number
  final_ids <- unique(sim_results[,1])
  for (each_id in final_ids) {
    weatherID <- paste0(SIMPLE_grid_data[which(SIMPLE_grid_data[, 1] == each_id), 7], D5, D6, D7, D8)
    if (split_file[[1]][2] == "baseline") {weatherFile <- paste0('Gridcell Weather/baseline/',weatherID,'.AgMIP')}
    else if (split_file[[1]][2] == "historical") {weatherFile <- paste0('Gridcell Weather/historical/',split_file[[1]][4],'/',weatherID,'.AgMIP')}
    else if (split_file[[1]][2] == "future") {weatherFile <- paste0('Gridcell Weather/future/',split_file[[1]][3],'/',split_file[[1]][4],'/',weatherID,'.AgMIP')}
    
    sim_each_id <- sim_results[sim_results[, 1] == each_id,]
    lat <- unique(sim_each_id$lat)
    weather_monthly <- MonthlyWeatherReader(weatherFile, as.numeric(lat))
    pdsi_n <- pdsi(weather_monthly$PRCP, weather_monthly$PET, AWC = 100, start = NULL, end = NULL, cal_start = NULL, cal_end = NULL, sc = TRUE)
    weather_monthly$PDSI <- pdsi_n$X
    for (i in 1:dim(sim_each_id)[1]) {
      num_months <- ceiling(as.numeric(sim_each_id[i,9])/30)
      harvest_month <- as.numeric(format(as.Date(sim_each_id[i,12]), "%m"))
      harvest_year <- sim_each_id[i,16]
      suppressMessages(suppressWarnings({spi_n <- spi(weather_monthly$PRCP, num_months)}))
      suppressMessages(suppressWarnings({spei_n <- spei(weather_monthly$PRCP-weather_monthly$PET, num_months)}))
      weather_monthly$SPI <- spi_n$fitted
      weather_monthly$SPEI <- spei_n$fitted
      sim_results$SPI[line_num] <- weather_monthly[weather_monthly$YYYY == harvest_year & weather_monthly$MM == harvest_month,]$SPI
      sim_results$SPEI[line_num] <- weather_monthly[weather_monthly$YYYY == harvest_year & weather_monthly$MM == harvest_month,]$SPEI
      sim_results$PDSI[line_num] <- mean(weather_monthly[weather_monthly$YYYY == harvest_year & weather_monthly$MM %in% (harvest_month - num_months + 1):harvest_month,]$PDSI)
      line_num <- line_num + 1
    }
  }
  write.csv(sim_results,paste0("Output/output_withDI/",substring(file, 24, nchar(file) - 4),"_DI.csv"),row.names = F)
}

t1=Sys.time() 
x=1:length(file_list)
no_cores <- detectCores() - 1
if (no_cores > length(file_list)) {
  activated_cores <- length(file_list)
} else {
  activated_cores <- no_cores
}
print(paste0("activated core #: ",activated_cores))
cl <- makeCluster(mc <- getOption("cl.cores", activated_cores))
clusterExport(cl, c("SIMPLE_grid_data", "file_list", "MonthlyWeatherReader"))
clusterEvalQ(cl, {
  library(dplyr)
  library(SPEI)
  library(scPDSI)
})
results <- parLapply(cl,x,ComputeDI) 
stopCluster(cl)
closeAllConnections()
Sys.time()-t1
###########
end_time <- Sys.time()
print(end_time - start_time)
